netlitThis vignette shows examples of assessing bias in literature review networks based on covariates from metadata about the studies and authors included or excluded from the review. Specifically, for each study, we collect metadata on the lead author’s gender, H-Index, and total number of citations. We then assess the impact of selecting studies on covariates in two ways:
First, we subset the network (e.g., to studies where the lead author is a man) and observe how many nodes and edges are missing in these subsets. This reveals the contributions of underrepresented scholars to the network by showing what we lose if they are excluded.
Second, we draw random samples of 100 studies weighted by covariates. This simulates a literature review that is biased (e.g., toward scholars who are men or have many citations). We then compare these biased samples to an unweighted random sample of studies in the network.
literature_metadata %>%
filter(is.na(author_gender) | is.na(author_h_index)| is.na(author_citations)) %>%
kable()| id | author | year | publication | title | citations | outside_u_s | author_gender | author_h_index | author_citations |
|---|---|---|---|---|---|---|---|---|---|
| Wildgen & Engstrom 1980 | Wildgen & Engstrom | 1980 | Legislative Studies Quarterly | Spatial Distribution of Partisan Support and the Seats/Votes Relationship | 30 | NA | NA | NA | NA |
| Buchler 2005 | Buchler | 2005 | Journal of Theoretical Politics | Competition, Representation and Redistricting: The Case Against Competitive Congressional Districts | 81 | NA | M | NA | NA |
| Glazer et al. 1987 | Glazer et al. | 1987 | AJPS | Partisan and Incumbency Effects of 1970s Congressional Redistricting | 102 | NA | NA | NA | NA |
| Gay 2007 | Gay | 2007 | JOP | Legislating Without Constraints: The Effect of Minority Districting on Legislators’ Responsiveness to Constituency Preferences | 47 | NA | F | NA | NA |
| Bratton & Haynie 1999 | Bratton & Haynie | 1999 | JOP | Agenda Setting and Legislative Success in State Legislatures: The Effects of Gender and Race | 740 | NA | F | NA | NA |
| Wyrick 1991 | Wyrick | 1991 | American Politics Quarterly | Management of Political Influence: Gerrymandering in the 1980s | 10 | NA | M | NA | NA |
| Bullock 1995 | Bullock | 1995 | American Politics Quarterly | The Impact of Changing the Racial Composition of Congressional Districts on Legislators’ Roll Call Behavior | 63 | NA | M | NA | NA |
| Overby & Cosgrove 1996 | Overby & Cosgrove | 1996 | JOP | Unintended Consequences? Racial Redistricting and the Representation of Minority Interests | 164 | NA | M | NA | NA |
| Sharpe & Garand 2001 | Sharpe & Garand | 2001 | Political Research Quarterly | Race, Roll Calls, and Redistricting: The Impact of Race-Based Redistricting on Congressional Roll-Call | 48 | NA | M | NA | NA |
| LeVeaux & Garand 2003 | LeVeaux & Garand | 2003 | Social Science Quarterly | Race‐Based Redistricting, Core Constituencies, and Legislative Responsiveness to Constituency Change* | 13 | NA | F | NA | NA |
| Lyons & Galderisi 1995 | Lyons & Galderisi | 1995 | Political Research Quarterly | Incumbency, Reapportionment, and US House Redistricting | 43 | NA | M | NA | NA |
| Hirsch 2003 | Hirsch | 2003 | Election Law Journal | The United States House of Unrepresentatives: What Went Wrong in the Latest Round of Congressional Redistricting | 159 | NA | NA | NA | NA |
| Forgette & Winkle 2006 | Forgette & Winkle | 2006 | Social Science Quarterly | Partisan Gerrymandering and the Voting Rights Act | 16 | NA | M | NA | NA |
| Forgette & Platt 2005 | Forgette & Platt | 2005 | Political Geography | Redistricting Principles and Incumbency Protection in the US Congress | 33 | NA | M | NA | NA |
| Wong 2019 | Wong | 2019 | BJPS | Gerrymandering in Electoral Autocracies: Evidence from Hong Kong | 16 | 1 | NA | 11 | 454 |
library(ggraph)lit <- literature_long %>%
distinct(to, from) %>%
review()
lit## A netlit_review object with the following components:
##
## $edgelist
## - 69 edges
## - edge attributes: edge_betweenness
## $nodelist
## - 56 nodes
## - node attributes: degree_in, degree_out, degree_total, betweenness
## $graph
## an igraph object
# best seed 1,4, *5*
set.seed(5)
netlit_plot <- function(g){
ggraph(g, layout = 'fr') +
geom_node_point(
aes(color = degree_total %>% as.factor() ),
size = 6,
alpha = .7
) +
geom_edge_arc2(
aes(
start_cap = circle(3, 'mm'),
end_cap = circle(6, 'mm'),
color = edge_betweenness,
),
curvature = 0,
arrow = arrow(length = unit(2, 'mm'),
type = "open")
) +
geom_edge_loop(
aes( color = edge_betweenness,
start_cap = circle(5, 'mm'),
end_cap = circle(2, 'mm'),
),
n = 300,
strength = .6,
arrow = arrow(length = unit(2, 'mm'),
type = "open")
) +
geom_node_text( aes(label = name), size = 2.3) +
ggplot2::theme_void() +
theme(legend.position="bottom") +
labs(edge_color = "Edge Betweenness",
color = "Total Degree\nCentrality",
edge_linetype = "") +
scale_edge_color_viridis(discrete = FALSE,
option = "plasma",
begin = 0,
end = .9,
direction = -1) +
scale_color_viridis_d(option = "mako",
begin = 1,
end = .5)
}
g <- literature_long %>%
distinct(to, from) %>%
review() %>%
.$graph
g %>%
netlit_plot()# for plotting bias
netlit_bias_plot <- function(subgraph){
# lit with edge attribute indicating missing from subgraph
lit <- literature_long %>%
distinct(to, from) %>%
left_join( subgraph$edgelist %>% distinct(to, from) %>% mutate(missing_edges = "Not missing")
) %>%
mutate(missing_edges = replace_na(missing_edges, "Missing"))
lit %<>%
review(edge_attributes = names(lit))
# missing nodes
missing_nodes <- lit$nodelist$node[!lit$nodelist$node %in% subgraph$nodelist$node]
set.seed(5)
ggraph(lit$g, layout = 'fr') +
geom_node_point(
aes(color = ifelse(name %in% missing_nodes, "Missing", "Not Missing")),
size = 6,
alpha = .7
) +
geom_edge_arc2(
aes(
start_cap = circle(3, 'mm'),
end_cap = circle(6, 'mm'),
color = missing_edges,
),
curvature = 0,
arrow = arrow(length = unit(2, 'mm'),
type = "open")
) +
geom_edge_loop(
aes( color = missing_edges,
start_cap = circle(5, 'mm'),
end_cap = circle(2, 'mm'),
),
n = 300,
strength = .6,
arrow = arrow(length = unit(2, 'mm'),
type = "open")
) +
geom_node_text( aes(label = name), size = 2.3) +
ggplot2::theme_void() +
theme(legend.position="bottom") +
labs(edge_color = "",
color = "",
edge_linetype = "") +
scale_color_viridis_d(option = "inferno",
begin = 1,
end = .5,
direction = -1) +
scale_edge_color_viridis(discrete = TRUE,
option = "inferno",
begin = 1,
end = .5,
direction = -1)
}literature_long %<>%
mutate(author_is_man = author_gender == "M")
bro_lit <- literature_long %>%
filter(author_is_man) %>%
distinct(to, from) %>%
review()
bro_lit## A netlit_review object with the following components:
##
## $edgelist
## - 61 edges
## - edge attributes: edge_betweenness
## $nodelist
## - 52 nodes
## - node attributes: degree_in, degree_out, degree_total, betweenness
## $graph
## an igraph object
set.seed(5)
#netlit_plot(bro_lit$graph)
netlit_bias_plot(bro_lit)
missing_nodes <- lit$nodelist$node[!lit$nodelist$node %in% bro_lit$nodelist$node]
missing_from <- lit$edgelist$from[!lit$edgelist$from %in% bro_lit$edgelist$from]
missing_to <- lit$edgelist$to[!lit$edgelist$to %in% bro_lit$edgelist$to]Nodes missing: Instability, Legislative Outcomes, Personal Vote, Efficiency Principle
# biased sample weights
literature_long %<>%
mutate(unbiased = .5,
weight = case_when(
author_is_man ~ .6,
!author_is_man ~ .4,
TRUE~ .5
))
# a function to sample the network
sample_lit <- function(n, literature_long, prob){
# create an index for the sample
samp_idx <- sample(seq_len(nrow(literature_long)),
100, # 100 draws = number of studies to draw
prob=prob # with prob var provided
)
# subset sample to index
sample <- literature_long %>%
rowid_to_column() %>%
filter(rowid %in% samp_idx) %>%
distinct(to, from) %>%
review()
return(sample)
}
# TEST
# random_samples <- sample_lit(literature_long, prob = literature_long$unbiased)
# gender_samples <- sample_lit(literature_long, prob = literature_long$weight)There are 165 studies in the original literature review.
random_samples <- map(1:1000, # 100 samples
sample_lit,
literature_long=literature_long,
prob = literature_long$unbiased)
# make a table of the total number of nodes, edges, and the graph object for plotting
random <- tibble(
nodes = random_samples %>% map(1) %>% modify(nrow) %>% unlist(),
edges = random_samples %>% map(2) %>% modify(nrow) %>% unlist(),
graph = random_samples %>% map(3),
sample = "Random"
)
# map(random$graph, netlit_plot)
map(random_samples[1:10], netlit_bias_plot) Average nodes recovered: 46.799
Average edges recovered: 43.95
# biased samples
gender_samples <- map(1:1000, sample_lit,literature_long=literature_long, prob = literature_long$weight)
gender <- tibble(
nodes = gender_samples %>% map(1) %>% modify(nrow) %>% unlist(),
edges = gender_samples %>% map(2) %>% modify(nrow) %>% unlist(),
graph = gender_samples %>% map(3),
sample = "Gender bias favoring men"
)
# map(gender_samples[1:10], netlit_bias_plot)
map(gender_samples[1:10], netlit_bias_plot)Average nodes recovered: 47.472
Average edges recovered: 44.25
# biased sample weights
literature_long %<>%
mutate(weight = case_when(
author_is_man ~ 1,
!author_is_man ~ .3,
TRUE~ .5
))
# biased samples
gender_samples <- map(1:1000, sample_lit,literature_long=literature_long, prob = literature_long$weight)
gender <- tibble(
nodes = gender_samples %>% map(1) %>% modify(nrow) %>% unlist(),
edges = gender_samples %>% map(2) %>% modify(nrow) %>% unlist(),
graph = gender_samples %>% map(3),
sample = "Gender bias favoring men"
)
#map(gender$graph, netlit_plot)
map(gender_samples[1:10], netlit_bias_plot)Average nodes recovered: 48.951
Average edges recovered: 45.339
# biased sample weights
literature_long %<>%
mutate(weight = case_when(
author_is_man ~ .3,
!author_is_man ~ 1,
TRUE~ .5
))
# biased samples
gender_samples2 <- map(1:1000, sample_lit,literature_long=literature_long, prob = literature_long$weight)
gender2 <- tibble(
nodes = gender_samples2 %>% map(1) %>% modify(nrow) %>% unlist(),
edges = gender_samples2 %>% map(2) %>% modify(nrow) %>% unlist(),
graph = gender_samples2 %>% map(3),
sample = "Gender bias favoring women"
)
#map(gender$graph, netlit_plot)
map(gender_samples2[1:10], netlit_bias_plot)Average nodes recovered: 44.627
Average edges recovered: 42.591
(replacing NA HIndex with 0)
literature_long %<>%
mutate(author_h_index = replace_na(author_h_index, 0 ))
# biased samples
hindex_samples <- map(1:1000, sample_lit,literature_long=literature_long, prob = literature_long$author_h_index)
hindex <- tibble(
nodes = hindex_samples %>% map(1) %>% modify(nrow) %>% unlist(),
edges = hindex_samples %>% map(2) %>% modify(nrow) %>% unlist(),
graph = hindex_samples %>% map(3),
sample = "H-Index bias"
)
#map(gender$graph, netlit_plot)
map(hindex_samples[1:10], netlit_bias_plot)Average nodes recovered: 51.78
Average edges recovered: 47.145
(replacing NA author citations with 0)
literature_long %<>%
mutate(author_citations = replace_na(author_citations, 0 ))
# gender-biased samples
citations_samples <- map(1:1000, sample_lit,literature_long=literature_long, prob = literature_long$author_citations)
citations <- tibble(
nodes = citations_samples %>% map(1) %>% modify(nrow) %>% unlist(),
edges = citations_samples %>% map(2) %>% modify(nrow) %>% unlist(),
graph = citations_samples %>% map(3),
sample = "Citations bias"
)
# map(citations$graph, netlit_plot)
map(citations_samples[1:10], netlit_bias_plot) # %>% .[c(1:10)]Average nodes recovered: 51.905
Average edges recovered: 46.811
s <- full_join(random, gender) %>%
full_join(gender2) %>%
full_join(hindex) %>%
full_join(citations)
s %>%
ggplot() +
aes(x = nodes, fill = sample, color = sample) +
geom_density(alpha = .3) +
scale_color_viridis_d() +
scale_fill_viridis_d() +
theme_minimal() +
labs(color = "",
fill = "",
x = "Nodes Recovered (out of 56)") +
theme(axis.text.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
s %>%
ggplot() +
aes(x = edges, fill = sample, color = sample) +
geom_density(alpha = .3) +
scale_color_viridis_d() +
scale_fill_viridis_d() +
theme_minimal() +
labs(color = "",
fill = "",
x = "Edges Recovered (out of 69)") +
theme(axis.text.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())